
This project analyzes shooting incidents in New York City with a focus on geographic and location-based patterns. The analysis uses two NYPD datasets: the historic dataset covering incidents from 2015 through 2024, and the 2025 year-to-date dataset providing the most recent records. Together, these data allow examination of long-term patterns and current trends in shootings across boroughs and location types.
Note The analysis was limited to the most recent 10-year period (2015-2025) to optimize file size for web-based visualization while still capturing meaningful temporal patterns.
The analysis includes descriptive summaries of borough-level shooting counts, location-based breakdowns (inside versus outside), and a Poisson regression model evaluating whether shooting counts vary by location type and victim race across boroughs. These approaches provide insight into how environmental and demographic factors relate to shooting incidents in New York City.
library(tidyverse)
library(tidycensus)
library(sf)
library(mapview)
library(knitr)
library(kableExtra)
library(broom)
library(tigris)
library(leaflet)
options(tigris_use_cache = TRUE, tigris_progress = FALSE)
census_api_key("959ba8ec2ff8f8bf41e4cafecc6ec9727219fe63",
install = TRUE, overwrite = TRUE) |>
invisible()
dat1 = read.csv("./Data folder/Shooting_Historic.csv")
dat2 = read.csv("./Data folder/Shooting_2025.csv")
dat1 = dat1 |>
mutate(year = as.numeric(substr(OCCUR_DATE, nchar(OCCUR_DATE) - 3, nchar(OCCUR_DATE)))) |>
filter(year >= 2015) |>
distinct(INCIDENT_KEY, .keep_all = TRUE)
dat_all = bind_rows(
dat1 |> mutate(source = "historic"),
dat2 |> mutate(source = "y2025")
) |>
distinct(INCIDENT_KEY, .keep_all = TRUE)
dat_all = dat_all |>
mutate(
location_type = case_when(
LOC_OF_OCCUR_DESC == "INSIDE" ~ "Inside",
LOC_OF_OCCUR_DESC == "OUTSIDE" ~ "Outside",
TRUE ~ "Unknown"
)
)
dat1 = dat1 |>
mutate(
location_type = case_when(
LOC_OF_OCCUR_DESC == "INSIDE" ~ "Inside",
LOC_OF_OCCUR_DESC == "OUTSIDE" ~ "Outside",
TRUE ~ "Unknown"
)
)
dat_all_sf = dat_all |>
filter(!is.na(Latitude) & !is.na(Longitude)) |>
filter(Latitude != 0 & Longitude != 0) |>
st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326)
This section presents descriptive summaries of shooting incidents across the five boroughs and by location type. It focuses on understanding basic patterns in counts and distributions before moving into spatial visualization and regression modeling.
This subsection summarizes the total number of shooting incidents for each borough using the historic dataset from 2015 to 2024.
dat1_boro = dat1 |>
group_by(BORO) |>
summarise(
n_total = n(),
.groups = "drop"
)
dat1_boro |>
mutate(
n_total = format(n_total, big.mark = ",")
) |>
kable(
col.names = c("Borough", "Total Shootings"),
align = c("l", "r"),
caption = "Shooting Incident Counts by Borough (2015 to 2024)"
) |>
kable_styling(full_width = FALSE, position = "center")
| Borough | Total Shootings |
|---|---|
| BRONX | 3,202 |
| BROOKLYN | 4,028 |
| MANHATTAN | 1,577 |
| QUEENS | 1,602 |
| STATEN ISLAND | 311 |
This subsection presents shooting incident counts for each borough using the full dataset that includes all available years through 2025.
dat_all_boro = dat_all |>
group_by(BORO) |>
summarise(
n_total = n(),
.groups = "drop"
)
dat_all_boro |>
mutate(
BORO = stringr::str_to_title(stringr::str_to_lower(BORO)),
n_total = format(n_total, big.mark = ",")
) |>
kable(
col.names = c("Borough", "Total Shootings"),
align = c("l", "r"),
caption = "Shooting Incident Counts by Borough (All Years Including 2025)"
) |>
kable_styling(full_width = FALSE, position = "center")
| Borough | Total Shootings |
|---|---|
| Bronx | 3,410 |
| Brooklyn | 4,225 |
| Manhattan | 1,646 |
| Queens | 1,673 |
| Staten Island | 319 |
This subsection examines the distribution of shooting incidents by location type (inside buildings versus outside) for the historic period.
dat1_location = dat1 |>
filter(location_type != "Unknown") |>
group_by(location_type) |>
summarise(
n_total = n(),
.groups = "drop"
)
dat1_location |>
mutate(
n_total = format(n_total, big.mark = ",")
) |>
kable(
col.names = c("Location", "Total Shootings"),
align = c("l", "r"),
caption = "Shooting Incidents by Location Type (2015 to 2024)"
) |>
kable_styling(full_width = FALSE, position = "center")
| Location | Total Shootings |
|---|---|
| Inside | 500 |
| Outside | 2,672 |
This subsection presents the distribution of shooting incidents by location type using the full dataset.
dat_all_location = dat_all |>
filter(location_type != "Unknown") |>
group_by(location_type) |>
summarise(
n_total = n(),
.groups = "drop"
)
dat_all_location |>
mutate(
n_total = format(n_total, big.mark = ",")
) |>
kable(
col.names = c("Location", "Total Shootings"),
align = c("l", "r"),
caption = "Shooting Incidents by Location Type (All Years Including 2025)"
) |>
kable_styling(full_width = FALSE, position = "center")
| Location | Total Shootings |
|---|---|
| Inside | 556 |
| Outside | 3,169 |
This subsection cross-tabulates shooting incidents by borough and location type for the full dataset.
dat_all_loc_boro = dat_all |>
filter(location_type != "Unknown") |>
group_by(BORO, location_type) |>
summarise(
n_total = n(),
.groups = "drop"
) |>
pivot_wider(
names_from = location_type,
values_from = n_total,
values_fill = 0
)
dat_all_loc_boro |>
mutate(BORO = stringr::str_to_title(stringr::str_to_lower(BORO))) |>
kable(
col.names = c("Borough", "Inside", "Outside"),
align = c("l", "r", "r"),
caption = "Shooting Incidents by Borough and Location Type (All Years)"
) |>
kable_styling(full_width = FALSE, position = "center")
| Borough | Inside | Outside |
|---|---|---|
| Bronx | 174 | 1096 |
| Brooklyn | 224 | 1047 |
| Manhattan | 71 | 516 |
| Queens | 68 | 451 |
| Staten Island | 19 | 59 |
This section presents interactive maps that visually display geographic differences in shooting incidents across boroughs. Each point represents an individual shooting incident.
This map shows all shooting incidents from 2015 through 2025 as scatter points overlaid on borough boundaries.
nyc_boro = tigris::counties(state = "NY", cb = TRUE) |>
filter(NAME == "Bronx" | NAME == "Kings" | NAME == "New York" | NAME == "Queens" | NAME == "Richmond")
boro_to_county = function(df) {
df |>
mutate(
county_name = case_when(
BORO == "MANHATTAN" ~ "New York",
BORO == "BROOKLYN" ~ "Kings",
BORO == "BRONX" ~ "Bronx",
BORO == "QUEENS" ~ "Queens",
BORO == "STATEN ISLAND" ~ "Richmond"
)
)
}
dat_all_boro = boro_to_county(dat_all_boro)
nyc_boro_map_all = nyc_boro |>
left_join(dat_all_boro, by = c("NAME" = "county_name"))
mapview(
nyc_boro_map_all,
zcol = "n_total",
layer.name = "Borough Totals",
alpha.regions = 0.3
) +
mapview(
dat_all_sf,
cex = 1,
alpha = 0.5,
col.regions = "red",
layer.name = "All Shootings"
)